* Merge different waves from the BHPS

********************************************************************************
* SETUP WORKSPACE **************************************************************
global keepvarlist *pid* *hid* *hhorig *age* *dob* *finnow *fisit* *fiy* *opfam* *oppol* *oprlg*  ///
		*tenure *hsown* *hsval *opcls2 *jb* *istrtdatm *isco *jsbos* *jssiz* ///
		*vote* *opsoc* *ypopf* *ypopscb* *ypvte* *opcls3 *ophl* *opeur* *opnat* ///
		*ypvt11* *ypjbqd* *ypopsc* *ypsoc*  pagold* *jbso* *spso* ///
		*qfachi* *qfedhi* *feend* *casmin* *isced* *maedhi* *paedhi* /// education
		*jbhgs* *jlhgs* *mrjhgs* *j1hgs* *j1rg* *jbgol* *jlgol* *mrjgol* *jbsta* jbsem* /// status variables
		*prosc* *jbsat* *jbsec* *futre* /// Only keep these variables from substantive datasets
		*lrwght *lewght /// weights
		*xphp* *hscan* hscnt* *hhsize* *hhtype* *region* *ladist* *fihh* *hhyneti /// Vars to take from hh dataset, including income, size and "can't afford" type questions

********************************************************************************
* BUILD COMPLETE DATASET OF ALL RESPONDENTS ************************************

* USE xwavedat as base, which contains stable characteristics of all individuals 
* ever enumerated in BHPS 
use $data/us_wx/xwavedat.dta, clear

* Drop respondents only in UKHLS
drop if xwdat_dv==1 

* Add month of birth from BHPS original data
merge m:1 pid using $data/bhps_wx/xwaveid_bh.dta, nogen keepusing(birthy birthm)
order birthm, after(birthy)

* We only keep individuals who are original sample members
keep if hhorig==3

* Create panel data from cross-section and create year and wave indicators
expand 18
bys pidp: gen year=_n
	replace year = 1990+year
bys pidp: gen waven=_n
label variable wave "BHPS Wave indicator, numerical"
xtset pidp year, yearly
tsfill, full
isid pidp year

gen wave="" // Create a string indicator for wave, which matches original data
label variable wave "BHPS/UKHLS Wave indicator"
		replace wave="ba" if waven==1  
		replace wave="bb" if waven==2  
		replace wave="bc" if waven==3  
		replace wave="bd" if waven==4  
		replace wave="be" if waven==5  
		replace wave="bf" if waven==6  
		replace wave="bg" if waven==7  
		replace wave="bh" if waven==8  
		replace wave="bi" if waven==9  
		replace wave="bj" if waven==10  
		replace wave="bk" if waven==11  
		replace wave="bl" if waven==12  
		replace wave="bm" if waven==13  
		replace wave="bn" if waven==14  
		replace wave="bo" if waven==15  
		replace wave="bp" if waven==16  
		replace wave="bq" if waven==17  
		replace wave="br" if waven==18  

compress
save $workspace\metadata.dta, replace

* Now we add substantive data. 
* Do so by appending all datafiles together and then merging with metadata
set more off, perm
clear all

* BHPS data, Adults
	// Load wave specific info from BHPS using loops. 
foreach v in r q p o n m l k j i h g f e d c b a {
	// I prefer going backward b/c some variables become detailed over time
	// This takes the value labels from the last wave
	preserve
	di "*** `v' ***"
	local foldernum= strpos("abcdefghijklmnopqr","`v'" )
	use $data/bhps_w`foldernum'/b`v'_indresp.dta, clear
	
	* Create IDs var wavefile
	gen wave="b`v'"
	gen dataset="BHPS"
	gen record="Adult"
	
	*Add household data
	merge m:1 b`v'_hid using $data/bhps_w`foldernum'/b`v'_hhresp.dta, nogen keep(master match)

	* Only keep variables specified above
	local varinset
	foreach var of global keepvarlist {
		cap ds `var'
		local varinset `varinset' `r(varlist)'
	}
	local varinset: list uniq varinset
	keep wave dataset record `varinset'
	di "`varinset'"
	
	* remove wave prefix
	rename b`v'_* *
	
	tempfile t
	save `t', replace
	restore
		
	append using `t'
}

	merge 1:1 pidp wave using $workspace/metadata.dta, nogen keep(match using)
	erase $workspace/metadata.dta
	order pidp year wave waven dataset record, first
	sort pidp year

	compress
	save $workspace/data/ready4wrangling.dta, replace

order pid year pidp wave waven dataset record, first
sort pid year

isid pid year

save $workspace/data/ready4wrangling.dta, replace
